//Figure 3: Heterogeneous effects of road tolls on BEV ownership and number of cars

*===============================================================================
// Step 1: Create dataset - reduce to couples and create heterogeneity variables
*===============================================================================
use    "${dataout}MainDataset" , clear
keep if year>=2015 & year<=2017
keep if couple==1

// The file "Link_grkrets_regtype1" maps basic statistical units to aggregated region types.
// Region types are merged to basic statistical unit of residence.
sort grkrets
merge m:1 grkrets using  "${datain}Link_grkrets_regtype1", keep(match master)
drop _merge

gen CityCategory=. // Creating four categories based on region types
replace CityCategory=1 if (regtype1==1 | regtype1==3) & !missing(regtype1) // Oslo, Bergen, Trondheim or Stavanger
replace CityCategory=2 if (regtype1==2 | regtype1==4) & !missing(regtype1) // Suburbs of the four largest cities
replace CityCategory=3 if (regtype1==5 | regtype1==6) & !missing(regtype1) // Other, smaller cities
replace CityCategory=4 if (regtype1==7) & !missing(regtype1) // Rural areas

// Merging region types to basic statistical unit of the workplace.
forvalues i=1/2 {
	preserve
		use "${datain}Link_grkrets_regtype1" , clear
		rename grkrets grk_bed`i'
		rename regtype1 regtype1_bed`i'
		tempfile helpfile_`i'
		save `helpfile_`i''
	restore
	sort grk_bed`i'
	merge m:1 grk_bed`i' using  `helpfile_`i'', keep(match master)
	drop _merge
}

forvalues i=1/2 {
	gen CityCategory_bed`i' = . // Creating four categories based on region types (workplace of each spouse)
	replace CityCategory_bed`i'=1 if (regtype1_bed`i'==1 | regtype1_bed`i'==3) & !missing(regtype1_bed`i')
	replace CityCategory_bed`i'=2 if (regtype1_bed`i'==2 | regtype1_bed`i'==4) & !missing(regtype1_bed`i')
	replace CityCategory_bed`i'=3 if (regtype1_bed`i'==5 | regtype1_bed`i'==6) & !missing(regtype1_bed`i')
	replace CityCategory_bed`i'=4 if (regtype1_bed`i'==7) & !missing(regtype1_bed`i')
}

* === Interaction variable for geography =======================================
gen commuting_dir = .
replace commuting_dir = 1 if CityCategory == 1 													 // Within city
replace commuting_dir = 2 if CityCategory == 1 & CityCategory_bed1 != 1 & CityCategory_bed1 != . // From city
replace commuting_dir = 2 if CityCategory == 1 & CityCategory_bed2 != 1 & CityCategory_bed2 != . // From city
replace commuting_dir = 3 if CityCategory == 2 													 // Within suburb
replace commuting_dir = 4 if CityCategory == 2 & CityCategory_bed1 == 1 						 // Suburb to city
replace commuting_dir = 4 if CityCategory == 2 & CityCategory_bed2 == 1 						 // Suburb to city
replace commuting_dir = 5 if CityCategory == 3 													 // Other cities
replace commuting_dir = 6 if CityCategory == 4 													 // Rural areas

* === Interaction variable for income ==========================================
egen wies_fam_sum =rowtotal(wies1 wies2), missing
xtile wies_fam_sum_quintile =  wies_fam_sum, nq(5)
compress

*===============================================================================
// Step 2: Run regressions, four different outcomes, save as ster-files
*===============================================================================
local outcomelist BEV_fam_yes car_fam_count // car_fam_yes ICE_fam_count 
local treatvar1list toll_fam_mean         
local treatvar2list ptl_fam_km_mean    

** Regressions - same regression new baseline
foreach outcome in `outcomelist'{
	foreach treatvar1 in `treatvar1list'{
		foreach treatvar2 in `treatvar2list'{
			forvalues i = 1/5 { // interactions betw treatment var and income
				gen tr1_inc_q`i' = `treatvar1' * (wies_fam_sum_quintile == `i')
				gen tr2_inc_q`i' = `treatvar2' * (wies_fam_sum_quintile == `i')
			}

			forvalues i = 1/6 { // interactions betw treatment var and geography
				gen tr1_geo`i' = `treatvar1' * (commuting_dir == `i')	
				gen tr2_geo`i' = `treatvar2' * (commuting_dir == `i')	
			}
			compress	
			reghdfe `outcome' 									/// Outcome variable
				`treatvar1' `treatvar2' 						/// Treatvars, baseline values: income quantile 5 and living+working in large city
				tr1_inc_q1 tr1_inc_q2 tr1_inc_q3 tr1_inc_q4  	/// Treatvar1 by income groups
				tr2_inc_q1 tr2_inc_q2 tr2_inc_q3 tr2_inc_q4  	/// Treatvar2 by income groups
				tr1_geo2 tr1_geo3 tr1_geo4 tr1_geo5 tr1_geo6 	/// Treatvar1 by geography
				tr2_geo2 tr2_geo3 tr2_geo4 tr2_geo5 tr2_geo6 	/// Treatvar2 by geography  
				$age  $distance  $time  $publictime  $publicquality, /// Controls
				absorb($FE   $household  $income  $employment  $education i.year) /// Fixed effects and additional absorbed controls
				vce(cluster $clustervar) 						/// multi-dimensional clustering
			summarize `outcome' if e(sample)==1
			estadd scalar MeanDep = r(mean), replace
			summarize `treatvar1' if e(sample)==1
			estadd scalar Treat1 = r(mean), replace
			summarize `treatvar2' if e(sample)==1
			estadd scalar Treat2 = r(mean), replace
			estadd local year="2015-17" , replace
			estadd local grkFE="\checkmark", replace
			estadd local grkbFE="\checkmark", replace
			estadd local HHcon="\checkmark", replace
			estadd local HHWork="\checkmark", replace
			estadd local PublicTime="\checkmark", replace
			estadd local grkFE_W="Yes", replace
			estadd local grkbFE_W="Yes", replace
			estadd local HHcon_W="Yes", replace
			estadd local HHWork_W="Yes", replace
			estadd local PublicTime_W="Yes", replace
			eststo reg_pooled_`outcome'_`y' 
			estimates save "${ster}hetero_reg_O_`outcome'" , replace

			drop tr1_*
			drop tr2_*
		}
	}
}


*===============================================================================
// Step 3: Load ster files, make figures
*===============================================================================
forvalues i = 1/5 { // Re-create heterogeneous treatment variables
gen tr1_inc_q`i' = toll_fam_mean   * (wies_fam_sum_quintile == `i')
gen tr2_inc_q`i' = ptl_fam_km_mean * (wies_fam_sum_quintile == `i')
}
forvalues i = 1/6 { // Re-create heterogeneous treatment variables
gen tr1_geo`i' = toll_fam_mean   * (commuting_dir == `i')	
gen tr2_geo`i' = ptl_fam_km_mean * (commuting_dir == `i')	
}

* Loading estimates for the outcome "BEV (yes = 1)"
capt estimates drop *
estimates use "${ster}hetero_reg_O_BEV_fam_yes"
estimates store c

* Marking sample
estimates esample: BEV_fam_yes toll_fam_mean ptl_fam_km_mean ///
	tr1_* tr2_* year age1 age2 dist1 dist2 time1 time2 ///
	PublicTransitTime_1 PublicTransitTime_2 VENTETID_1  BOARDINGS_1 TAKST_1 ///
	VENTETID_2 BOARDINGS_2 TAKST_2 grkrets_num grk_bed1_num grk_bed2_num ///
	children antpers_i_regstat_famnr secondhome wies1_decile wies2_decile ///
	wealth1_decile wealth2_decile employed1 employed2 retired1 retired2 grputd1 grputd2

sum year if e(sample) == 1 	// Based on sample ...
scalar N_tot = r(N)			// ...save N as scalar
forvalues i = 1/5 {			// For each income group
	forvalues j = 1/6 {		// For each geographic group
		qui sum year if e(sample) == 1 & wies_fam_sum_quintile == `i' & commuting_dir == `j'
		qui scalar N_inc`i'_geo`j' = r(N) 	// number of people per income-geography combination
		qui sum year if e(sample) == 1 & wies_fam_sum_quintile == `i'
		qui scalar N_inc`i' = r(N) 			// Number of people per income group
		qui sum year if e(sample) == 1 & commuting_dir == `j'
		qui scalar N_geo`j' = r(N) 			// Number of people per geography group
	}
	
}

*===============================================================================
// Figure 3a - BEV ownership by income
*===============================================================================
/* Creating actual variables to store estimates in for convenience */
gen plot_coef = .
gen plot_se = .
gen plot_ci1 = .
gen plot_ci2 = .
gen plot_axis = .
/* The effect of treatment per income group also depends on geography. Therefore,
for each income group estimate, add the average effect of geography, using sample
shares as weights */
forvalues i = 1/5 {
if `i' == 5 {
	lincom toll_fam_mean 			/// Within city, income = 5 (base category)
		+ tr1_geo2 * N_geo2/N_tot 	/// From city
		+ tr1_geo3 * N_geo3/N_tot 	/// Suburbs
		+ tr1_geo4 * N_geo4/N_tot 	/// Suburbs to city
		+ tr1_geo5 * N_geo5/N_tot 	/// Smaller cities
		+ tr1_geo6 * N_geo6/N_tot   //  Rural areas
} 
else {	
	lincom toll_fam_mean 			/// Within city, income = 5 (base category)
		+ tr1_inc_q`i' 				/// Plus additional effect of income group `i'
		+ tr1_geo2 * N_geo2/N_tot 	/// From city
		+ tr1_geo3 * N_geo3/N_tot 	/// Suburbs
		+ tr1_geo4 * N_geo4/N_tot 	/// Suburbs to city
		+ tr1_geo5 * N_geo5/N_tot 	/// Smaller cities
		+ tr1_geo6 * N_geo6/N_tot   //  Rural areas  
	}
replace plot_coef = r(estimate) in `i' 	// The linear combinations of coefficients stored here
replace plot_se   = r(se) in `i'		// Estimates of standard errors stored here
replace plot_axis = `i' in `i' 			// X-axis (1-5)
}

replace plot_ci1 = plot_coef - 1.96*plot_se // Creating lower 95% bound manually
replace plot_ci2 = plot_coef + 1.96*plot_se // Creating upper 95% bound manually

twoway 									///	Figure of linear combinations of parameters
	(rcap plot_ci2 plot_ci1 plot_axis, 	/// Plotting confidence intervals
		sort color(black)) 				///			
	(scatter plot_coef plot_axis, 		/// Plotting point estimates
		sort lcolor(black) mcolor(black) lpattern(solid)), 	///	
	graphregion(color(white)) scale(1.2)					///
	xtitle("Income quintile") ytitle("BEV (yes=1)") 		///
	name(figure3a, replace) 								///
	legend(off) yline(0, lcol(gs10) lpattern(solid)) 		///
	ylab(0.0(0.0005)0.0025, angle(vertical) gstyle(solid))  ///
	xlab(,nogrid)
	
graph export     "${figures}Figure3a.png" , replace
graph export     "${figures}Figure3a.pdf" , replace
graph save       "${figures}Figure3a.gph" , replace	

*===============================================================================
// Figure 3c - BEV ownership by geography
*===============================================================================
/* Resetting variables of storing estimates */
replace plot_coef = .
replace plot_se = .
replace plot_ci1 = .
replace plot_ci2 = .
replace plot_axis = .
/* The effect of treatment per geographic group also depends on income. Therefore,
for each geography estimate, add the average effect of income, using sample
shares as weights */
forvalues i = 1/6 {
if `i' == 1 {
	lincom toll_fam_mean 			/// Income = 5, within city (base category)
		+ tr1_inc_q1 * N_inc1/N_tot /// Income = 1
		+ tr1_inc_q2 * N_inc2/N_tot /// Income = 2
		+ tr1_inc_q3 * N_inc3/N_tot /// Income = 3
		+ tr1_inc_q4 * N_inc4/N_tot //  Income = 4
} 
else {	
	lincom toll_fam_mean 			/// Income = 5, within city (base category)
		+ tr1_geo`i' 				/// Plus additional effect of geographic group `i'
		+ tr1_inc_q1 * N_inc1/N_tot /// Income = 1
		+ tr1_inc_q2 * N_inc2/N_tot /// Income = 2
		+ tr1_inc_q3 * N_inc3/N_tot /// Income = 3
		+ tr1_inc_q4 * N_inc4/N_tot //  Income = 4
	}
replace plot_coef = r(estimate) in `i'	// The linear combinations of coefficients stored here
replace plot_se   = r(se) in `i'		// Estimates of standard errors stored here
replace plot_axis = `i' in `i' 			// X-axis (1-6)
}

replace plot_ci1 = plot_coef - 1.96*plot_se // Creating lower 95% bound manually
replace plot_ci2 = plot_coef + 1.96*plot_se // Creating upper 95% bound manually

twoway 									///	Figure of linear combinations of parameters
	(rcap plot_ci2 plot_ci1 plot_axis, 	/// Plotting confidence intervals
		sort color(black)) 				///			
	(scatter plot_coef plot_axis, 		/// Plotting point estimates
		sort lcolor(black) mcolor(black) lpattern(solid)), 	///	
	graphregion(color(white)) scale(1.2)					///
	xtitle("") ytitle("BEV (yes=1)") 						///
	xlabel(1 `""Within" "city""' 	/// Labels matching "commuting_dir" variable
		2 `""From" "city""' 		///
		3 `"Suburbs"' 				///
		4 `""Suburbs" "to city""' 	///
		5 `""Smaller" "cities""' 	///
		6 `""Rural" "areas""') 		///
	name(figure3c, replace) 		///
	legend(off) yline(0, lcol(gs10) lpattern(solid)) 		///
	ylab(0.0(0.0005)0.0025, angle(vertical) gstyle(solid)) 	///
	xlab(,nogrid)
	
graph export     "${figures}Figure3c.png" , replace
graph export     "${figures}Figure3c.pdf" , replace
graph save       "${figures}Figure3c.gph" , replace		

*===============================================================================
// Figure 3b - car ownership by income
*===============================================================================
* Loading estimates for the outcome "Number of cars"
capt estimates drop *
estimates use "${ster}hetero_reg_O_car_fam_count"
estimates store c

* Marking sample
estimates esample: car_fam_count toll_fam_mean ptl_fam_km_mean ///
	tr1_* tr2_* year age1 age2 dist1 dist2 time1 time2 ///
	PublicTransitTime_1 PublicTransitTime_2 VENTETID_1  BOARDINGS_1 TAKST_1 ///
	VENTETID_2 BOARDINGS_2 TAKST_2 grkrets_num grk_bed1_num grk_bed2_num ///
	children antpers_i_regstat_famnr secondhome wies1_decile wies2_decile ///
	wealth1_decile wealth2_decile employed1 employed2 retired1 retired2 grputd1 grputd2
* Creating weights
sum year if e(sample) == 1
scalar N_tot = r(N)
forvalues i = 1/5 {
	forvalues j = 1/6 {
		qui sum year if e(sample) == 1 & wies_fam_sum_quintile == `i' & commuting_dir == `j'
		qui scalar N_inc`i'_geo`j' = r(N) 	// number of people per income-geography combination 
		qui sum year if e(sample) == 1 & wies_fam_sum_quintile == `i'
		qui scalar N_inc`i' = r(N)			// Number of people per income group
		qui sum year if e(sample) == 1 & commuting_dir == `j'
		qui scalar N_geo`j' = r(N)			// Number of people per geography group
	}
	
}
/* Resetting variables of storing estimates */
replace plot_coef = .
replace plot_se = .
replace plot_ci1 = .
replace plot_ci2 = .
replace plot_axis = .
/* The effect of treatment per income group also depends on geography. Therefore,
for each income group estimate, add the average effect of geography, using sample
shares as weights */
forvalues i = 1/5 {
if `i' == 5 {
	lincom toll_fam_mean 			/// Within city, income = 5 (base category)
		+ tr1_geo2 * N_geo2/N_tot 	/// From city
		+ tr1_geo3 * N_geo3/N_tot 	/// Suburbs
		+ tr1_geo4 * N_geo4/N_tot 	/// Suburbs to city
		+ tr1_geo5 * N_geo5/N_tot 	/// Smaller cities
		+ tr1_geo6 * N_geo6/N_tot   //  Rural areas
} 
else {	
	lincom toll_fam_mean 			/// Within city, income = 5 (base category)
		+ tr1_inc_q`i' 				/// Plus additional effect of income group `i'
		+ tr1_geo2 * N_geo2/N_tot 	/// From city
		+ tr1_geo3 * N_geo3/N_tot 	/// Suburbs
		+ tr1_geo4 * N_geo4/N_tot 	/// Suburbs to city
		+ tr1_geo5 * N_geo5/N_tot 	/// Smaller cities
		+ tr1_geo6 * N_geo6/N_tot   //  Rural areas  
	}
replace plot_coef = r(estimate) in `i' 	// The linear combinations of coefficients stored here
replace plot_se   = r(se) in `i'		// Estimates of standard errors stored here
replace plot_axis = `i' in `i' 			// X-axis (1-5)
}

replace plot_ci1 = plot_coef - 1.96*plot_se // Creating lower 95% bound manually
replace plot_ci2 = plot_coef + 1.96*plot_se // Creating upper 95% bound manually

twoway 									///	Figure of linear combinations of parameters
	(rcap plot_ci2 plot_ci1 plot_axis, 	/// -Plotting confidence intervals
		sort color(black)) 				///			
	(scatter plot_coef plot_axis, 		/// -Plotting point estimates
		sort lcolor(black) mcolor(black) lpattern(solid)), 	///	
	graphregion(color(white)) scale(1.2)					///
	xtitle("Income quintile") ytitle("Number of cars") 		///
	name(figure3b, replace) 								///
	legend(off) yline(0, lcol(gs10) lpattern(solid)) 		///
	ylab(-0.0030(0.001)0.0010, angle(vertical) gstyle(solid)) /// 
	xlab(,nogrid)
	
graph export     "${figures}Figure3b.png" , replace
graph export     "${figures}Figure3b.pdf" , replace
graph save       "${figures}Figure3b.gph" , replace	

*===============================================================================
// Figure 3d - Car ownership by geography
*===============================================================================
/* Resetting variables of storing estimates */
replace plot_coef = .
replace plot_se = .
replace plot_ci1 = .
replace plot_ci2 = .
replace plot_axis = .
/* The effect of treatment per geographic group also depends on income. Therefore,
for each geography estimate, add the average effect of income, using sample
shares as weights */
forvalues i = 1/6 {
if `i' == 1 {
	lincom toll_fam_mean 			/// Income = 5, within city (base category)
		+ tr1_inc_q1 * N_inc1/N_tot /// Income = 1
		+ tr1_inc_q2 * N_inc2/N_tot /// Income = 2
		+ tr1_inc_q3 * N_inc3/N_tot /// Income = 3
		+ tr1_inc_q4 * N_inc4/N_tot //  Income = 4
} 
else {	
	lincom toll_fam_mean 			/// Income = 5, within city (base category)
		+ tr1_geo`i' 				/// Plus additional effect of geographic group `i'
		+ tr1_inc_q1 * N_inc1/N_tot /// Income = 1
		+ tr1_inc_q2 * N_inc2/N_tot /// Income = 2
		+ tr1_inc_q3 * N_inc3/N_tot /// Income = 3
		+ tr1_inc_q4 * N_inc4/N_tot //  Income = 4
	}
replace plot_coef = r(estimate) in `i'	// The linear combinations of coefficients stored here
replace plot_se   = r(se) in `i'		// Estimates of standard errors stored here
replace plot_axis = `i' in `i' 			// X-axis (1-6)
}

replace plot_ci1 = plot_coef - 1.96*plot_se // Creating lower 95% bound manually
replace plot_ci2 = plot_coef + 1.96*plot_se // Creating upper 95% bound manually

twoway 									///	Figure of linear combinations of parameters
	(rcap plot_ci2 plot_ci1 plot_axis, 	/// Plotting confidence intervals
		sort color(black)) 				///			
	(scatter plot_coef plot_axis, 		/// Plotting point estimates
		sort lcolor(black) mcolor(black) lpattern(solid)), 	///	
	graphregion(color(white)) scale(1.2)					///
	xtitle("") ytitle("Number of cars") 					///
	xlabel(1 `""Within" "city""' /// Labels matching "commuting_dir" variable
		2 `""From" "city""' 		///
		3 `"Suburbs"' 				///
		4 `""Suburbs" "to city""' 	///
		5 `""Smaller" "cities""' 	///
		6 `""Rural" "areas""') 		///
	name(figure3d, replace) 		///
	legend(off) yline(0, lcol(gs10) lpattern(solid)) 		///
	ylab(-0.0030(0.001)0.0010, angle(vertical) gstyle(solid)) ///
	xlab(,nogrid)
	
graph export     "${figures}Figure3d.png" , replace
graph export     "${figures}Figure3d.pdf" , replace
graph save       "${figures}Figure3d.gph" , replace		